provides regular expression matching operations similar to those found in Perl
| expression | description |
|---|---|
^ |
Matches the beginning of a line |
$ |
Matches the end of the line |
. |
Matches any character |
\s |
Matches whitespace |
\S |
Matches any non-whitespace character |
* |
Repeats a character zero or more times |
*? |
Repeats a character zero or more times (non-greedy) |
+ |
Repeats a character one or more times |
+? |
Repeats a character one or more times (non-greedy) |
[aeiou] |
Matches a single character in the listed set |
[^XYZ] |
Matches a single character not in the listed set |
[a-z0-9] |
The set of characters can include a range |
( |
Indicates where string extraction is to start |
) |
Indicates where string extraction is to end |
| method | description |
|---|---|
re.findall(pattern, string) |
returns a list of strings |
re.search(pattern, string) |
returns a Match object (which is truthy) |
re.split(pattern, string, maxsplit=0) |
returns a list of strings |
re.sub(pattern, substitute, string) |
returns a string |
import re
alphabet = 'abcdefghijklmnopqrstuvwxyz'
re.findall('[aeiou]', alphabet)
# ['a', 'e', 'i', 'o', 'u']
re.findall('[aeiou].+[aeiou]', alphabet) # greedy
# ['abcdefghijklmnopqrstu']
re.findall('[aeiou].+?[aeiou]', alphabet) # non-greedy
# ['abcde', 'ijklmno']import re
re.findall('[aeiou]', 'abcde') # returns a list of all sub-strings matching the regular expression
# ['a', 'e']
re.findall('.+:', 'a:b:c:d:e') # greedy (prefers the longest match)
# ['a:b:c:d:']
re.findall('.+?:', 'a:b:c:d:e') # non-greedy (prefers the shortest match)
# ['a:', 'b:', 'c:', 'd:']import re
re.findall('From \S+@\S+', 'From name@domain blah blah')
# ['From name@domain']
re.findall('From (\S+@\S+)', 'From name@domain blah blah')
# ['name@domain']
re.findall('\S+?@\S+', 'From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008')
# ['stephen.marquard@uct.ac.za']
re.findall('\S+@\S+', 'From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008')
# ['stephen.marquard@uct.ac.za']import re
alphabet = 'abcdefghijklmnopqrstuvwxyz'
re.search('[^aeiou]+', alphabet)
# <re.Match object; span=(1, 4), match='bcd'>import re
alphabet = 'abcdefghijklmnopqrstuvwxyz'
re.split('[aeiou]', alphabet)
# ['', 'bcd', 'fgh', 'jklmn', 'pqrst', 'vwxyz']
re.split('[^aeiou]+', alphabet)
# ['a', 'e', 'i', 'o', 'u', '']
re.split('[^aeiou]+', alphabet, maxsplit=1)
# ['a', 'efghijklmnopqrstuvwxyz']import re
alphabet = 'abcdefghijklmnopqrstuvwxyz'
re.sub('[aeiou]', "_", alphabet)
# '_bcd_fgh_jklmn_pqrst_vwxyz'
re.sub('[^aeiou]', "_", alphabet)
# 'a___e___i_____o_____u_____'
re.sub('[^aeiou]+', "_", alphabet)
# 'a_e_i_o_u_'from pprint import pprint
import re
pprint(dir(re))
# ['A',
# 'ASCII',
# 'DEBUG',
# 'DOTALL',
# 'I',
# 'IGNORECASE',
# 'L',
# 'LOCALE',
# 'M',
# 'MULTILINE',
# 'Match',
# 'Pattern',
# 'RegexFlag',
# 'S',
# 'Scanner',
# 'T',
# 'TEMPLATE',
# 'U',
# 'UNICODE',
# 'VERBOSE',
# 'X',
# '_MAXCACHE',
# '__all__',
# '__builtins__',
# '__cached__',
# '__doc__',
# '__file__',
# '__loader__',
# '__name__',
# '__package__',
# '__spec__',
# '__version__',
# '_cache',
# '_compile',
# '_compile_repl',
# '_expand',
# '_locale',
# '_pickle',
# '_special_chars_map',
# '_subx',
# 'compile',
# 'copyreg',
# 'enum',
# 'error',
# 'escape',
# 'findall',
# 'finditer',
# 'fullmatch',
# 'functools',
# 'match',
# 'purge',
# 'search',
# 'split',
# 'sre_compile',
# 'sre_parse',
# 'sub',
# 'subn',
# 'template']